Loading Data

### Delete if runs
# project_dir <- file.path("~/Desktop/tmp/wastewater-communities")
# metadata_dir <- file.path(project_dir, "/")
# 
# dat_files <- c(
#    env_meta = file.path(metadata_dir, "env_meta.csv"),
#    seq_meta = file.path(metadata_dir, "seq_meta.csv"),
#    id_key = file.path(metadata_dir, "id_keys.csv"),
#    merge_id = file.path(metadata_dir, "merge_id_key.csv"),
#    key_meta = file.path(metadata_dir, "key_meta.csv"),
#    mr_blast_file = file.path(project_dir, "wastewater_blast.rdata")
# )
# 
# # metadata
# meta_env_src <- read_csv(dat_files['env_meta'])
# 
# 
# # Loading blast based OTU count data.
# load(dat_files['mr_blast_file'])
# 
# # Only including spray irrigation and treatment plant data in analysis.
# plantObj <- which((pData(wastewaterMRobj_blast)$study_type == "plant" &
#                   pData(wastewaterMRobj_blast)$Stage_name != "Influent post screening") |
#                       pData(wastewaterMRobj_blast)$Region_Site == "MA_SI1") %>% 
#     {wastewaterMRobj_blast[,.]} %>% cumNorm(p = 0.75)
# 
# ww_meta <- plantObj %>% pData() %>% 
#       as.data.frame %>% mutate(Seq_ID = row.names(.)) %>% 
#       separate(Env_ID,c("Region","TP","Date","Stage"), sep = "_") %>% 
#       unite("WW_TP", Region,TP, sep = "_", remove = F)  %>%
#       mutate(plant_name = paste(ifelse(Region == "MA", 
#                                        "Mid-Atlantic",
#                                        "Midwest"),
#                                 ifelse(TP %in% c("TP1","TP2"),
#                                        paste0("WW",TP),TP))) %>% 
#       select(Seq_ID, WW_TP, plant_name, Region, TP, Date, Stage, Stage_name, 
#              study_type, Season, StageID, StageID_all_plants)
# rownames(ww_meta) <- ww_meta$Seq_ID

plantObj <- readRDS("plantObj.rds")
ww_meta <- pData(plantObj) 

Results Seq Data

count_dat <- metagenomeSeq::expSummary(plantObj)
count_dat$Seq_ID <- rownames(count_dat)
count_df <- left_join(ww_meta, count_dat)

Seq count info

count_tbl <- plantObj@assayData$counts %>% as_tibble() 
count_tbl$OTU <- plantObj@featureData@data$OTU
count_tbl <- count_tbl %>% gather("sam","count",-OTU) %>% 
    mutate(count = if_else(count != 0, 1,0)) %>% 
    group_by(OTU) %>% summarise(n_sam = sum(count)) %>% 
    filter(n_sam == 1)
assigned_un_df <- count_tbl %>% left_join(plantObj@featureData@data) %>% 
    mutate(assinged_un = if_else(grepl("OTU_",Taxonomy), "unassigned","assigned")) %>% 
    group_by(assinged_un) %>% summarise(count = n())

# total seq count
count_total <- sum(count_df$libSize)
# total number of samples
num_samples <- nrow(count_df)

# average seq count per sample
avg_seq_sample <- count_total/num_samples

# number of unique assigned species level OTUs
n_unique_assigned_otus <- assigned_un_df$count[assigned_un_df$assinged_un == "assigned"]
# number of unique unassigned species level OTUs
n_unique_unassigned_otus <- assigned_un_df$count[assigned_un_df$assinged_un == "unassigned"]

Total sequence count: 6.11383710^{6}
Total number of samples: 65
Average sequence count per sample : 9.40590310^{4}
Unique OTUs are defined as OTUs only present in one sample.
Total number of unique assigned species OTUs: 1521 Total number of unique unassigned species OTUs: 347

sample_count_summary <- count_df %>% select(WW_TP, Stage) %>% 
    group_by(WW_TP, Stage) %>% 
    summarize(count = n())
sample_count_summary$count %>% sum()
## [1] 65
sample_count_summary %>% kable()
WW_TP Stage count
MA_SI1 AfterUVTreatment 7
MA_SI1 BeforeUVTreatment 6
MA_SI1 HoldingPondInlet 7
MA_SI1 PumpHouseInlet 8
MA_TP1 ActivatedSludgeReactor 2
MA_TP1 Effluent 3
MA_TP1 RawInfluent 4
MA_TP1 SecondaryClarifier 2
MA_TP2 ActivatedSludgeReactor 2
MA_TP2 Effluent 1
MA_TP2 RawInfluent 2
MA_TP2 SecondaryClarifier 2
MW_TP1 Effluent 3
MW_TP1 PostAeration 2
MW_TP1 RawInfluent 3
MW_TP1 SecondaryClarifier 2
MW_TP2 CellB 4
MW_TP2 Effluent 3
MW_TP2 RawInfluent 2

Supplemental Figure 1 - Coverage Analysis

SC- Hill number based coverage estimate, extrapolated rarefaction curve, considering the singleton to tentons (not sure this is a real word)? Coverage is the estimated OTU coverage calculated by dividing the number of observed OTUs by the Chao1 diversity estimate.
Chao1, is based on singletons and likely overestimates species diversity for 16S data. Therefore Chao1 based coverage estimates are conservative. Filtering samples with less than 100 sequences.

div_est <- plantObj  %>% MRcounts()  %>% DataInfo()
div_est$StageID <- factor(count_df$StageID)
p <- ggplot(div_est, aes(x = n, y = SC)) +
      geom_point(aes(color = StageID)) + scale_x_log10() + 
      geom_vline(aes(xintercept = 100), linetype = 2, color = "grey 60") + 
      theme_bw() +
      labs(x = "Number of Sequences",y = "Coverage") +
      theme(legend.position = "bottom") 
ggMarginal(p, type = "histogram", margins = "x")
Number of observed sequences compared to the estimated coverage. The vertical dotted line indicates the cut off for sample excluded from the analysis. Histogram at the top show the distribution of samples relative to the number of sequences per sample.

Number of observed sequences compared to the estimated coverage. The vertical dotted line indicates the cut off for sample excluded from the analysis. Histogram at the top show the distribution of samples relative to the number of sequences per sample.

Excluding samples with less than 100 counts.

plantObj <- which(colSums(plantObj) > 100) %>% {plantObj[, .]} 
## creating phyloseq object
plant_phy <-plantObj %>% MRexperiment2biom(norm = TRUE,log = TRUE) %>% 
    import_biom2()
colnames(plant_phy@tax_table) <- c("taxa_OTU","Taxonomy","Kingdom",
                                   "Phylum","Class","Order",
                                   "Family","Genus","Species")
plant_phy@tax_table <- plant_phy@tax_table[,-2]

Alpha Diversity Calculations

Calculating Diversity Metrics Diversity metrics were calculated using OTU level count data.

waste_counts <- plantObj %>% MRcounts(norm =TRUE, log = TRUE)
ww_div <- data_frame(sampleID = colnames(waste_counts),
                     shannon = diversity(waste_counts, MARGIN = 2),
                     simpson = diversity(waste_counts, MARGIN = 2, index = "simpson"),
                     specnumber = specnumber(waste_counts,MARGIN = 2)) %>% 
    gather(diversity, metric, -sampleID)


ww_div_df <- ww_meta %>% 
    mutate(Stage = ifelse(Stage %in% c("RawInfluent","PostScreeningInfluent"), 
                          "Influent",Stage)) %>% 
    right_join(ww_div, by = c("Seq_ID" ="sampleID"))

Influents

Supplemental Figure 2 - Alpha diversity

ww_div_df %>% filter(Stage == "Influent") %>% 
    ggplot() + geom_point(aes(x = plant_name, y = metric, color = plant_name)) + 
        facet_wrap(~diversity, scale = "free_y") + 
        theme_bw() + theme(axis.text.x = element_blank(), legend.position = "bottom") +
        labs(x = "Wastewater Treatment Plant", y = "Diversity Metric", color = "")

Testing for Differences

Initially using a parametric model, ANOVA, to test for differences in alpha diversity by wastewater treatment plants. Based on the diagnostic plots the the residuals are not normally distributed with long tails. Due to the non-normally distributed residuals the non-parametric Kruskal-Wallis test was also used to test for statistically significant differences. The alpha diversity is not statistically significant when comparing influents among treatment plants.

ww_inf_df <- ww_div_df %>% filter(Stage == "Influent")

fit_aov <- function(df) aov(metric~WW_TP, data = df)
test_kruskal <- function(df) kruskal.test(df$metric,factor(df$WW_TP))


inf_div_test <- ww_inf_df %>% group_by(diversity) %>% nest() %>% 
    mutate(aov_mod = map(data, fit_aov), krusk = map(data, test_kruskal))

aov_summary <- inf_div_test$aov_mod %>% map_df(glance) %>% 
    rename(f_stat = statistic, f_p = p.value) 
aov_summary$diversity <- inf_div_test$diversity

krusk_summary <- inf_div_test$krusk %>% map_df(glance) %>% 
    rename(kruskal = statistic, kruk_p = p.value) %>% 
    select(-parameter, -method)
krusk_summary$diversity <- inf_div_test$diversity

inf_div_test_tbl <- aov_summary %>% 
    select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% left_join(krusk_summary)

inf_div_test_tbl %>% kable()
diversity logLik AIC BIC f_stat f_p kruskal kruk_p
shannon -8.003281 26.00656 27.51949 0.4840213 0.7055600 2.900000 0.4073016
simpson 51.631243 -93.26249 -91.74956 0.5866456 0.6456501 2.900000 0.4073016
specnumber -77.682358 165.36472 166.87764 0.6685194 0.6015781 2.836364 0.4175487

ANOVA Diagnostic plots

div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
    print(autoplot(inf_div_test$aov_mod[[i]]) + 
              ggtitle(paste0(div_metrics[i])))
}

Figure 1 - Relative Abundance

mrexp_inf <- plantObj[, which(pData(plantObj)$Stage_name %in% 
                                  c("Raw influent", 
                                    "Influent post screening"))]
mrexp_inf <- aggregateByTaxonomy(mrexp_inf, 
                                 lvl = "genus", 
                                 alternate = TRUE, 
                                 norm = TRUE, log = TRUE)

pData(mrexp_inf)$plant_name <- pData(mrexp_inf)$plant_name %>% factor()

mrexp_inf <- cumNorm(mrexp_inf, p = 0.75)
mrexp_inf <- filterData(mrexp_inf, present = 7, depth = 1)
# mrexp_inf <- cumNorm(mrexp_inf, p = 0.75)
s <- normFactors(mrexp_inf)
pd <- pData(mrexp_inf)

settings <- zigControl(maxit = 1, verbose = FALSE)
mod <- model.matrix(~plant_name, data = pd)
colnames(mod) <- levels(pd$plant_name)
res = fitZig(obj <- mrexp_inf, mod = mod, control = settings)
zigFit = res$fit
fit2 = eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075) 
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_inf)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>% 
    rownames_to_column(var = "genus") %>% 
    gather("Seq_ID", "Count",-genus) %>% left_join(pd)

Differentially abundance genus between treatment plant influent samples. Might want to exclude MW_TP2 since there is only one sample.

kable(top_tbl)
Mid.Atlantic.WWTP1 Mid.Atlantic.WWTP2 Midwest.WWTP1 Midwest.WWTP2 scalingFactor AveExpr F P.Value adj.P.Val
Streptococcus 3.7106349 -2.2729514 -1.4019061 -1.5863193 7.881069 5.117024 296.3466 0e+00 3e-07
Bifidobacterium 4.2305666 -1.6500646 -0.8484295 -1.8457110 5.031820 5.019435 242.1323 0e+00 3e-07
Paracoccus -0.2321056 -1.2775004 0.0920867 0.2259361 12.279543 3.364678 182.6427 0e+00 7e-07
Faecalibacterium 2.6022519 -1.7300736 -0.3179924 -1.2048628 4.864957 3.546670 177.8089 0e+00 7e-07
Clostridium 3.8715052 2.3704363 0.4583797 2.2030953 3.137254 5.674791 153.5457 0e+00 8e-07
Blautia 3.9436406 -2.4829157 -0.3428441 -4.4061591 3.114230 3.867834 185.0868 0e+00 8e-07
Eubacterium 2.6387443 -2.4973135 -1.0345646 -2.2384306 5.285341 3.241547 150.3832 0e+00 8e-07
Propionibacterium 0.8938151 -0.1300985 0.5111170 -0.9144029 7.847056 3.359342 139.4012 0e+00 9e-07
Lactococcus 2.6804230 -1.2802727 -0.7181955 -1.8288987 3.957832 3.251467 138.0501 0e+00 9e-07
Lachnoclostridium 1.7521640 0.5113022 -0.2986833 -1.7004446 5.761949 3.378824 111.2265 1e-07 2e-06
count_df %>% 
    # mutate(plant_name = str_replace(plant_name, "Mid-Atlantic ", "MA-"),
    #        plant_name = str_replace(plant_name, "Midwest ", "MW-")) %>% 
ggplot() + 
    geom_point(aes(x = plant_name, y = Count, color = plant_name)) + 
    geom_line(aes(x = plant_name, y = Count, color = plant_name)) + 
    facet_wrap(~genus,nrow = 2) +
    theme_bw() + scale_y_log10() + labs(y = "Relative Abundance", 
                                        color = "Treatment Plant",
                                        x = "Treatment Plant") + 
    theme(legend.position = "bottom", 
          axis.text.x = element_text(angle = 90),
          strip.text =element_text(face = "italic"))

Influent-Effluent Pairs

Need to check regarding limited number of effluent samples ….

Supplemental Figure 3 - Alpha diversity

ww_div_inf_eff <- ww_div_df %>% filter(Stage %in% c("Influent","Effluent")) %>% 
    group_by(plant_name, Stage, Date, diversity) %>% 
    summarise(metric = median(metric))
ww_div_inf_eff %>% ungroup() %>% 
    mutate(Stage = factor(Stage, levels = c("Influent","Effluent")),
           Date = paste(plant_name, Date)) %>% 
    ggplot(aes(x = Stage, y = metric)) +
        geom_path(aes(group = Date), color = "grey60") +
        geom_point(aes(x = Stage, y = metric, color = plant_name)) +
        facet_wrap(~diversity, scale = "free_y") +
        labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
      theme(legend.position = "bottom")

Testing for differences

Using a paired t-test to compared the alpha diversity between influent and effluent samples, specnumber (number of observed OTUs) and shannon’s diversity index were significantly higher for influent compared to effluent. Shannon’s diversity index is close to the significance threshold of 0.05, and when accounting for multiple comparisons Shannon’s diversity index is no longer significant. The number of observed OTUs is biased by sequencing depth, as the effluent samples consistently have fewer reads than influent samples the observed difference is likely at least partially due to differences in sequencing depth.

inf_eff_paired <- ww_div_inf_eff %>% group_by(plant_name, Date, Stage, diversity) %>% 
    summarize(metric = median(metric)) %>% spread(Stage, metric) %>%
    filter(!is.na(Influent), !is.na(Effluent))

inf_v_ef_test_df <- data_frame()
for(i in c("shannon","simpson","specnumber")){
    inf_v_ef_test_df <- inf_eff_paired %>% 
        filter(diversity == i) %>% 
        {t.test(.$Influent, .$Effluent, paired = TRUE, alternative = "greater")} %>% 
        tidy() %>% mutate(diversity = i) %>% bind_rows(inf_v_ef_test_df)
}
inf_v_ef_test_df %>% select(diversity, estimate, statistic, p.value, parameter, conf.low, conf.high) %>% kable()
diversity estimate statistic p.value parameter conf.low conf.high
specnumber 1273.9000000 4.716943 0.0045962 4 698.1542891 Inf
simpson 0.0030069 1.228177 0.1433526 4 -0.0022124 Inf
shannon 0.9979746 2.104947 0.0515381 4 -0.0127536 Inf

Relative Abundance

mrexp_inf_eff <- plantObj[, which(pData(plantObj)$Stage_name %in% 
                                  c("Raw influent", 
                                    "Influent post screening","Effluent"))]
mrexp_inf_eff <- aggregateByTaxonomy(mrexp_inf_eff, 
                                 lvl = "genus", 
                                 alternate = TRUE, 
                                 norm = TRUE, log = TRUE)

pData(mrexp_inf_eff)$plant_name <- pData(mrexp_inf_eff)$plant_name %>% factor()
pData(mrexp_inf_eff)$Stage_name <- pData(mrexp_inf_eff)$Stage_name %>% 
      ifelse(. %in% c("Raw influent","Influent post screening"),"Influent",.) %>% 
      factor(levels = c("Influent","Effluent"))

mrexp_inf_eff <- cumNorm(mrexp_inf_eff, p = 0.75)
mrexp_inf_eff <- filterData(mrexp_inf_eff, present = 7, depth = 1)
# mrexp_inf_eff <- cumNorm(mrexp_inf_eff, p = 0.75)
s <- normFactors(mrexp_inf_eff)
pd <- pData(mrexp_inf_eff)

settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~Stage_name, data = pd)
colnames(mod) = levels(pd$Stage_name)
res = fitZig(obj = mrexp_inf_eff, mod = mod, control = settings)
zigFit = res$fit
fit2 <- eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075) 
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_inf_eff)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>% 
    rownames_to_column(var = "genus") %>% 
    gather("Seq_ID", "Count",-genus) %>% left_join(pd) %>% 
    mutate(plant_date = paste0(Date, plant_name))
kable(top_tbl)
Influent Effluent scalingFactor AveExpr F P.Value adj.P.Val
Trichococcus 5.7475672 -3.9840536 0.4676418 4.121937 111.78295 0 0
Leucobacter 2.1108296 -3.1165125 5.5592729 3.060460 97.49628 0 0
Hyphomicrobium -0.1621981 2.1289837 3.5435518 2.229523 95.93467 0 0
Pseudomonas 10.7783844 -1.8564032 -5.5222311 7.379846 83.00545 0 0
Streptococcus 3.9734565 -3.4186268 3.6933844 3.874773 83.89276 0 0
Mycobacterium 1.2411825 0.0716067 6.4994120 3.838430 82.40745 0 0
Bifidobacterium 3.3391492 -4.3308347 5.4268273 3.532881 84.68735 0 0
Clostridium 3.5044912 -3.8787399 7.0094263 4.699553 70.43494 0 0
Lachnoclostridium 1.3140308 -3.8193525 6.6686699 2.374673 82.82946 0 0
Propionibacterium 1.2261816 -3.7892910 6.8894766 2.374481 71.41008 0 0

Figure 2

ggplot(count_df) + 
    geom_path(aes(x = Stage_name, y = Count, group = plant_date), 
              color = "grey60") + 
    geom_point(aes(x = Stage_name, y = Count, color = plant_name)) + 
    facet_wrap(~genus, nrow = 2) +
    theme_bw() + scale_y_log10() + 
    labs(y = "Relative Abundance", x = "Stage Name", color = "Treatment Plant") +
    theme(legend.position = "bottom", 
          strip.text = element_text(face = "italic"))

Beta Diversity

Description of metrics based on Biological Diversity frontiers in measurement and assessment. Edited by Anne E. Magurran and Brian McGill

\(\Beta\) diversity was calculated using two different metrics.

Bray Curtis \(S_{BC}\)

  • Bray Curtis is an abundance based similarity measure.

\[ S_{BC} = 1 - \frac{\sum_{i=1}^S |M_{i1} - M_{i2}|}{\sum_{i=1}^S (M_{i1} + M_{i2})} \]

  • \(M_{i1}\) and \(M_{i2}\) are the abundance of OTU \(i\) in sample 1 and 2 respectively.
  • \(S\) is the total number of OTUs in samples 1 and 2.

  • Confounds diversity with compositional similarity
  • Impact of different sample sizes
    • metric approaches 0 with increasingly different sample sizes even for samples with the same or different composition
    • Normalization should address this issue
  • Impact of different coverage (sampling fraction)
    • for different coverage samples the metric “becomes meaningless and performs erratically” (Chao et al 2006)

Chao et al. 2006 Abundance-based similarity indices and their estimatation when there are unseen species in samples. Biometrics, 62, 361-371

Jaccard \(S_J\)
* Jaccard is an incidence based (presence/absence) \(\Beta\) diversity measure.

\[ S_J = \frac{S_{12}}{(S_1 + S_2 - S_{12})} \]

  • \(S_{1}\) - total observed OTUs in sample 1
  • \(S_{2}\) - total observed OTUs in sample 2
  • \(S_{12}\) - total observed OTUs in samples 1 and 2
  • Incidence based measures assume nearly complete coverage

Section Objective
Look at all the samples together. Then look for interesting results and for these interesting results look at statistically significant differences.

For post-hoc comaprison most differences are spray irrigation stages.

Ordination Methods

  • Both nMDS and PCoA do not require the input distances in euclidean space.
  • nMDS obtains a euclidean representation of input non-euclidean data using a non-linear function.
  • PCoA is only able to provide a euclidean representation of the Euclidean part of the distance matric. The non-Euclidean part of the distance matric is not represented in the ordination.

Notes from Numerical Ecology Legendre and Legendra 3rd Edition p. 493

Summary of NMDS https://jonlefcheck.net/2012/10/24/nmds-tutorial-in-r/

Statistical Tests
ANOSIM - analysis of similarities, testing for similarities between groups, rank order of dissimilarity value based statistical test.
No post-hoc test for ANOSIM, testing for pairwise differences using betadisper.
Betadisper is an implementation of PERMDISP2, multivariate analgoue to Levene’s test for variance homogeneity.
Evaluates the variance or distance of samples from group centroid.

dist_methods <- unlist(distanceMethodList)[c(8,10)]

Treatment Process Comparison

pl <- subset_samples(plant_phy, study_type == "plant")

pl_pcoa_list <- list()
pl_nmds_list <- list()
for (i in dist_methods) {
    # Calculate distance matrix
    iDist <- distance(pl, method = i)
    # Calculate ordination
    pl_pcoa_list[[i]] <- ordinate(pl, method = "PCoA", distance = iDist)
    pl_nmds_list[[i]] <- ordinate(pl, method = "NMDS", distance = iDist)
}

Figure 3 - Ordination Plots

# plot_ordination(pl, pl_pcoa_list[[1]], color= "Stage", title = "Bray Curtis PCoA") + theme_bw() + stat_ellipse()
# plot_ordination(pl, pl_nmds_list[[1]], color= "Stage", title = "Bray Curtis NMDS") + theme_bw() + stat_ellipse()
# stressplot(pl_nmds_list[[1]])

Jaccard

# plot_ordination(pl, pl_pcoa_list[[2]], color= "Stage", title = "Jaccard PCoA") + theme_bw()
# plot_ordination(pl, pl_nmds_list[[2]], color= "Stage", title = "Jaccard NMDS") + theme_bw()
# stressplot(pl_nmds_list[[2]])

Testing for Differences

iDist <- distance(pl, method = "bray")
anosim(iDist,grouping = factor(pl@sam_data$Stage))
## 
## Call:
## anosim(dat = iDist, grouping = factor(pl@sam_data$Stage)) 
## Dissimilarity: bray 
## 
## ANOSIM statistic R: 0.5632 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999

Treatment plant stages are significantly different testing for pairwise differences

## betadisper has a post-hoc test
iDist <- distance(pl, method = "bray")
beta_fit <- betadisper(iDist,group = factor(pl@sam_data$Stage))
anova(beta_fit) %>% tidy() %>% kable()
term df sumsq meansq statistic p.value
Groups 5 0.3117790 0.0623558 11.31555 1.09e-05
Residuals 24 0.1322551 0.0055106 NA NA
TukeyHSD(beta_fit)$group %>% as.data.frame() %>% 
    rownames_to_column(var = "comparison") %>% 
    filter(`p adj` < 0.075) %>% kable(digits = 3)
comparison diff lwr upr p adj
PostAeration-ActivatedSludgeReactor -0.529 -0.785 -0.272 0.000
PostAeration-CellB -0.399 -0.680 -0.118 0.002
PostAeration-Effluent -0.568 -0.812 -0.325 0.000
RawInfluent-PostAeration 0.489 0.249 0.730 0.000
SecondaryClarifier-PostAeration 0.483 0.231 0.734 0.000

Spray Irrigation Stage Comparison

si <- subset_samples(plant_phy, study_type == "spray")
si_pcoa_list <- list()
si_nmds_list <- list()
for (i in dist_methods) {
    print(i)
    # Calculate distance matrix
    iDist <- distance(si, method = i)
    # Calculate ordination
    si_pcoa_list[[i]] <- ordinate(si, method = "PCoA", distance = iDist)
    si_nmds_list[[i]] <- ordinate(si, method = "NMDS", distance = iDist)
}
## [1] "bray"
## Run 0 stress 0.08628405 
## Run 1 stress 0.08628405 
## ... Procrustes: rmse 9.610914e-07  max resid 2.007411e-06 
## ... Similar to previous best
## Run 2 stress 0.3829659 
## Run 3 stress 0.2558146 
## Run 4 stress 0.08628405 
## ... Procrustes: rmse 2.892117e-06  max resid 9.106018e-06 
## ... Similar to previous best
## Run 5 stress 0.08628405 
## ... New best solution
## ... Procrustes: rmse 1.062147e-06  max resid 1.871165e-06 
## ... Similar to previous best
## Run 6 stress 0.09761258 
## Run 7 stress 0.1074574 
## Run 8 stress 0.1601557 
## Run 9 stress 0.09761272 
## Run 10 stress 0.08628405 
## ... Procrustes: rmse 2.545378e-06  max resid 4.930609e-06 
## ... Similar to previous best
## Run 11 stress 0.0964332 
## Run 12 stress 0.1600244 
## Run 13 stress 0.1074574 
## Run 14 stress 0.1663974 
## Run 15 stress 0.09643206 
## Run 16 stress 0.08628405 
## ... Procrustes: rmse 8.360176e-07  max resid 1.547145e-06 
## ... Similar to previous best
## Run 17 stress 0.1609997 
## Run 18 stress 0.1074574 
## Run 19 stress 0.09636965 
## Run 20 stress 0.0976128 
## *** Solution reached
## [1] "jaccard"
## Run 0 stress 0.08628405 
## Run 1 stress 0.08628405 
## ... New best solution
## ... Procrustes: rmse 1.580754e-06  max resid 5.108713e-06 
## ... Similar to previous best
## Run 2 stress 0.09761348 
## Run 3 stress 0.3856545 
## Run 4 stress 0.08628405 
## ... Procrustes: rmse 3.412486e-06  max resid 1.333437e-05 
## ... Similar to previous best
## Run 5 stress 0.09761607 
## Run 6 stress 0.1074674 
## Run 7 stress 0.08628405 
## ... Procrustes: rmse 2.537932e-06  max resid 6.172675e-06 
## ... Similar to previous best
## Run 8 stress 0.1601557 
## Run 9 stress 0.09761644 
## Run 10 stress 0.09628348 
## Run 11 stress 0.1617694 
## Run 12 stress 0.08628405 
## ... Procrustes: rmse 2.067053e-06  max resid 5.999207e-06 
## ... Similar to previous best
## Run 13 stress 0.08628405 
## ... Procrustes: rmse 1.855991e-06  max resid 6.65927e-06 
## ... Similar to previous best
## Run 14 stress 0.09636983 
## Run 15 stress 0.08628405 
## ... Procrustes: rmse 4.220688e-06  max resid 1.290558e-05 
## ... Similar to previous best
## Run 16 stress 0.1601557 
## Run 17 stress 0.08628405 
## ... New best solution
## ... Procrustes: rmse 6.392372e-07  max resid 2.221082e-06 
## ... Similar to previous best
## Run 18 stress 0.08628405 
## ... Procrustes: rmse 1.161473e-06  max resid 3.827379e-06 
## ... Similar to previous best
## Run 19 stress 0.09625276 
## Run 20 stress 0.08628405 
## ... Procrustes: rmse 1.961568e-06  max resid 4.142391e-06 
## ... Similar to previous best
## *** Solution reached

Ordination Plots

Bray Curtis

#plot_ordination(si, si_pcoa_list[[1]], color= "Stage", title = "Bray Curtis PCoA") + theme_bw()
#plot_ordination(si, si_nmds_list[[1]], color= "Stage", title = "Bray Curtis NMDS") + theme_bw()
stressplot(si_nmds_list[[1]])

Jaccard

#plot_ordination(si, si_pcoa_list[[2]], color= "Stage", title = "Jaccard PCoA") + theme_bw()
#plot_ordination(si, si_nmds_list[[2]], color= "Stage", title = "Jaccard NMDS") + theme_bw()
stressplot(si_nmds_list[[2]])

Testing for Differences by Stage

Spray irrigation stages are significantly different.

## betadisper has a post-hoc test
iDist <- distance(pl, method = "bray")
beta_fit <- betadisper(iDist,group = factor(pl@sam_data$Stage))
anova(beta_fit) %>% tidy() %>% kable()
term df sumsq meansq statistic p.value
Groups 5 0.3117790 0.0623558 11.31555 1.09e-05
Residuals 24 0.1322551 0.0055106 NA NA
TukeyHSD(beta_fit)$group %>% as.data.frame() %>% 
    rownames_to_column(var = "comparison") %>% 
    filter(`p adj` < 0.075) %>% kable(digits = 3)
comparison diff lwr upr p adj
PostAeration-ActivatedSludgeReactor -0.529 -0.785 -0.272 0.000
PostAeration-CellB -0.399 -0.680 -0.118 0.002
PostAeration-Effluent -0.568 -0.812 -0.325 0.000
RawInfluent-PostAeration 0.489 0.249 0.730 0.000
SecondaryClarifier-PostAeration 0.483 0.231 0.734 0.000

Treatment Processes

Supplemental Figure 4 - Alpha diversity

ww_div_treat <- ww_div_df %>% filter(study_type == "plant") %>% 
    group_by(plant_name, Stage, StageID, Date, diversity) %>% 
    summarise(metric = median(metric))
ww_div_treat %>% ungroup() %>% 
    mutate(Stage = factor(Stage,levels = c("Influent","ActivatedSludgeReactor", 
                                           "PostAeration","SecondaryClarifier",
                                           "CellB","Effluent")), 
           Date = paste(plant_name, Date)) %>% 
    ggplot(aes(x = Stage, y = metric)) +
        geom_line(aes(group = Date), color = "grey60") +
        geom_point(aes(x = Stage, y = metric)) +
        facet_grid(diversity~plant_name, scale = "free") +
        labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
        theme(axis.text.x = element_text(angle = 90))

Testing for Differences

Test for stage differences only performed for Mid-Atlantic WWTP1, only treatment plant with replicates at each stage. None of the stages are significantly different from each other.

fit_aov <- function(df) aov(metric~Stage, data = df)
treat_div_test <- ww_div_treat %>% filter(plant_name == "Mid-Atlantic WWTP1") %>% 
    group_by(diversity) %>% nest() %>% mutate(aov_mod = map(data, fit_aov))

aov_summary <- treat_div_test$aov_mod %>% map_df(glance) %>% 
    rename(f_stat = statistic, f_p = p.value) 
aov_summary$diversity <- inf_div_test$diversity

aov_summary %>% select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% kable()
diversity logLik AIC BIC f_stat f_p
shannon 2.735477 4.529047 6.518523 0.8182523 0.5236262
simpson 76.543857 -143.087714 -141.098238 0.5831169 0.6447918
specnumber -80.400346 170.800692 172.790168 0.8403800 0.5135556

ANOVA Diagnostic plots

div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
    print(autoplot(treat_div_test$aov_mod[[i]]) + 
              ggtitle(paste0(div_metrics[i])))
}

Relative Abundance

mrexp <- aggregateByTaxonomy(plantObj, 
                                 lvl = "genus", 
                                 alternate = TRUE, 
                                 norm = TRUE, log = TRUE)
pData(mrexp)$plant_name <- pData(mrexp)$plant_name %>% factor()
pData(mrexp)$Stage_name <- pData(mrexp)$Stage_name %>% 
      ifelse(. %in% c("Raw influent","Influent post screening"),"Influent",.) %>% 
      factor(levels =c("Influent", "Activated Sludge Reactor", "Post aeration", 
                       "Secondary clarifier", "Cell B","Effluent"))

plant_zigFit <- function(mrexp, plant){
    mrexp_plant <- mrexp[,which(pData(mrexp)$plant_name == plant)]
    pData(mrexp_plant)$Stage_name <- factor(pData(mrexp_plant)$Stage_name)
    mrexp_plant <- cumNorm(mrexp_plant, p = 0.75)
    mrexp_plant <- filterData(mrexp_plant, 
                              present = floor(dims(mrexp_plant)[2]/2), 
                              depth = 1)
    # mrexp_plant <- cumNorm(mrexp_plant, p = 0.75)
    pd <- pData(mrexp_plant)
    
    settings = zigControl(maxit = 1, verbose = FALSE)
    mod = model.matrix(~Stage_name, data = pd)
    colnames(mod) = levels(pd$Stage_name)

    res = fitZig(obj = mrexp_plant, mod = mod, control = settings)
    zigFit = res$fit
    finalMod = res$fit$design
    fit2 = eBayes(zigFit)
    top_tbl <- topTableF(fit2,p.value = 0.075) 
    top_feat <- top_tbl %>% rownames()

    count_tbl <- MRcounts(mrexp_plant)
    count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
    count_df <- count_tbl %>% as.data.frame() %>%
        rownames_to_column(var = "genus") %>%
        gather("Seq_ID", "Count",-genus) %>% left_join(pd)

   list(count_df = count_df, top_tbl = top_tbl)
}

Figure 4-7

make_ww_tp_plot <- function(plant_zig){
    ggplot(plant_zig$count_df) +
                geom_point(aes(x = Stage_name, y = Count, color = Stage_name)) +
                facet_wrap(~genus, nrow = 1) +
                theme_bw() + scale_y_log10() +
                labs(y = "Rel. Abu.", color = "Treatment Process") +
                theme_bw() +
              theme(axis.text.x = element_blank(),
                    axis.title.x = element_blank(),
                    strip.text = element_text(face = "italic"))
}

ww_tp <- pData(mrexp)$plant_name %>% 
    .[!grepl("SI", .)] %>% unique() %>% as.character()

ww_tp
## [1] "Mid-Atlantic WWTP1" "Midwest WWTP1"      "Midwest WWTP2"     
## [4] "Mid-Atlantic WWTP2"
ww_tp_plots <- ww_tp %>% set_names(ww_tp) %>% 
    map(plant_zigFit, mrexp = mrexp) %>% map(make_ww_tp_plot)

ggarrange(ww_tp_plots[[1]], ww_tp_plots[[4]], 
          ww_tp_plots[[2]], ww_tp_plots[[3]],
          labels = c("A", "B", "C", "D"),
          ncol = 1, nrow = 4,legend = "bottom")

ggsave("diff_abu_treat_process.tiff", width = 10, height = 8, dpi = 300)
for(i in ww_tp){
    plant_zig <- plant_zigFit(mrexp, i)
    print(paste("##### ",i))
    print(kable(plant_zig$top_tbl))
}

[1] “##### Mid-Atlantic WWTP1”

Influent Activated.Sludge.Reactor Secondary.clarifier Effluent scalingFactor AveExpr F P.Value adj.P.Val
Clostridium 4.6550534 2.6882460 -0.1768883 0.1049857 0.1038452 5.191797 407.3712 0 0
Streptococcus 4.4214939 -3.9155657 -4.1521202 -4.1548620 5.1290678 4.365203 257.5267 0 0
Psychrobacter 9.8822174 -4.7759110 -1.6178157 -5.9338067 -4.2300135 5.003581 208.9112 0 0
Blautia 3.6712287 -3.8071717 -3.5503492 -3.2899341 4.1688382 3.503733 204.7747 0 0
Bifidobacterium 4.5597415 -3.5621441 -3.6889689 -3.8951910 3.7574605 4.042504 186.6297 0 0
Mycobacterium 2.3034496 0.9282716 0.1477461 0.5542501 3.6303122 4.450662 174.5037 0 0
Hyphomicrobium -0.3471074 1.8481416 2.1486294 0.6735379 5.2495607 3.166733 141.4506 0 0
Legionella 0.5239134 1.2837241 2.9426167 4.0826619 1.8743828 3.297784 145.0902 0 0
Collinsella 3.7740783 -3.4214271 -3.4582399 -3.0507862 2.6595376 3.010166 122.9244 0 0
Halomonas 1.6081618 -0.7032213 -1.3810141 -0.2411062 3.6602712 2.978726 116.0583 0 0
[1] “##### Midwes t WWTP1“
Influent Post.aeration Secondary.clarifier Effluent scalingFactor AveExpr F P.Value adj.P.Val
Pseudomonas -23.630533 0.105302 -8.240840 -20.4045226 77.719383 6.227538 61.95803 0 0
Clostridium 2.122473 -5.399842 -3.374173 -2.2840493 8.759675 4.389424 41.76208 0 0
Microbacterium 1.379457 -2.054133 -1.623306 -1.1874663 7.927465 4.291348 34.49602 0 0
Halomonas 12.769173 1.104469 3.076655 5.2344307 -22.164137 4.303394 34.43027 0 0
Trichococcus -4.879886 -3.818507 -6.040672 -11.0197100 27.417079 3.624221 31.67985 0 0
Legionella -1.242781 1.187796 1.363266 0.6326445 9.776053 3.959564 31.25445 0 0
Acinetobacter 2.482344 -1.647600 -3.285401 -0.8741490 5.184652 3.997756 30.75649 0 0
Streptococcus -2.997110 -5.007819 -4.187472 -8.1253405 21.395387 3.398336 29.64021 0 0
Paracoccus -0.816102 -4.422484 -3.479822 -3.7610971 14.001570 3.650518 29.04346 0 0
Bifidobacterium -2.763916 -4.975682 -4.585847 -8.0060442 20.686219 3.281978 28.11082 0 0
[1] “##### Midwes t WWTP2“
Influent Cell.B Effluent scalingFactor AveExpr F P.Value adj.P.Val
Pseudomonas 12.9178954 2.1696692 4.0452340 -18.694367 7.610422 144.71594 0 0
Flavobacterium 5.9596378 6.1110278 8.4128520 -13.563307 6.114826 94.09863 0 0
Massilia 4.9578066 5.0081413 5.0723110 -8.310680 5.525375 73.30007 0 0
Psychrobacter 8.4670006 -2.7960862 -0.3766897 -6.180307 4.621402 57.07140 0 0
Mycobacterium 2.2069420 1.0629662 1.1102633 3.730231 4.631318 51.48076 0 0
Janthinobacterium 7.9092202 -3.0516411 -0.7035015 -4.968901 4.335694 50.52536 0 0
Chryseobacterium 9.8236404 0.0163297 1.3037310 -15.534458 3.875554 47.46053 0 0
Leifsonia 0.2850388 3.9970141 3.4387455 1.608575 3.929936 42.24056 0 0
Sporosarcina 2.4634713 11.3966017 11.9956610 -16.587045 4.905456 40.36015 0 0
Carnobacterium 8.9967568 -0.7213852 3.3700771 -17.488266 2.765606 38.88950 0 0
[1] “##### Mid-Atla ntic WWTP2“
Influent Activated.Sludge.Reactor Secondary.clarifier Effluent scalingFactor AveExpr F P.Value adj.P.Val
Pseudomonas 15.3533564 -8.293934 -0.2650290 -6.0221102 -17.1186409 5.602349 161.39890 0 0
Massilia 0.5722508 -1.261907 0.8698702 0.0849137 11.8001019 4.686981 114.59145 0 0
Clostridium 11.6378758 -6.372408 -0.6589032 -9.0803038 -11.5118755 3.960429 97.46035 0 0
Sphingomonas 2.7455555 -3.374304 0.0378776 0.0963023 6.9253092 4.295542 95.36705 0 0
Janthinobacterium 9.0873303 -8.581755 -3.6969893 -8.6307795 -2.0549787 3.590749 94.78396 0 0
Deinococcus 0.4011576 -1.057163 0.6554114 -2.7612105 10.6228229 3.688549 82.65160 0 0
Flavobacterium 6.8842755 -6.816087 -2.6880756 -2.5224507 -0.2771630 3.706567 77.00756 0 0
Halomonas -1.5140567 4.301894 0.4595413 5.2666151 8.8787726 3.862520 76.70902 0 0
Pedobacter 4.0860873 -4.081131 -1.2957918 -1.9527985 4.4869372 3.854354 76.42857 0 0
Chryseobacterium 6.8537808 -6.678815 -1.7132320 -6.6957809 -0.7111724 3.238090 71.54076 0 0

WWTP to SI site

Supplemental Figure S5 - Alpha diversity

spray_div <- ww_div_df %>% filter( WW_TP %in% c("MA_TP1","MA_SI1")) %>% 
    filter(Stage %in% c("Influent","Effluent", "BeforeUVTreatment", 
                        "AfterUVTreatment","HoldingPondInlet", 
                        "PumpHouseInlet")) %>% 
    group_by(plant_name, Stage, StageID, Date, diversity) %>% 
    summarise(metric = median(metric))
spray_div %>% ungroup() %>% 
    mutate(Stage = factor(Stage,levels = c("Influent","Effluent","BeforeUVTreatment", 
                        "AfterUVTreatment","HoldingPondInlet", 
                        "PumpHouseInlet"))) %>% 
    ggplot(aes(x = Stage, y = metric)) +
        geom_line(aes(group = Date), color = "grey60") +
        geom_point(aes(x = Stage, y = metric)) +
        facet_grid(diversity~., scale = "free") +
        labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
        theme(axis.text.x = element_text(angle = 90))

Testing for differences

For shannon and OTU number there are significant differences between influent, effluent, and spray irrigation stages.

fit_aov <- function(df) aov(metric~Stage, data = df)
spray_div_test <- spray_div %>% group_by(diversity) %>% 
    nest() %>% mutate(aov_mod = map(data, fit_aov))

aov_summary <- spray_div_test$aov_mod %>% map_df(glance) %>% 
    rename(f_stat = statistic, f_p = p.value) 
aov_summary$diversity <- spray_div_test$diversity

aov_summary %>% select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% kable()
diversity logLik AIC BIC f_stat f_p
shannon -22.1850 58.3700 67.17668 3.569642 0.0180853
simpson 117.2448 -220.4895 -211.68283 1.692608 0.1824067
specnumber -191.8790 397.7580 406.56472 6.154450 0.0013136

ANOVA Diagnostic plots

div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
    print(autoplot(spray_div_test$aov_mod[[i]]) + 
              ggtitle(paste0(div_metrics[i])))
}

Testing for pairwise differences.

for(i in c("shannon", "specnumber")){
    print(i)
    tukey <- spray_div %>% filter(diversity == i) %>% {aov(metric~Stage, data = .)} %>% TukeyHSD()
    tidy(tukey) %>% filter(adj.p.value < 0.1) %>% print()
}
## [1] "shannon"
##    term                comparison estimate   conf.low conf.high
## 1 Stage Influent-HoldingPondInlet 1.374929 0.06097935   2.68888
##   adj.p.value
## 1  0.03698308
## [1] "specnumber"
##    term                 comparison estimate conf.low conf.high adj.p.value
## 1 Stage  Influent-AfterUVTreatment 1271.125 287.8470  2254.403 0.006898620
## 2 Stage Influent-BeforeUVTreatment 1161.500 178.2220  2144.778 0.014936608
## 3 Stage  Influent-HoldingPondInlet 1384.083 486.4774  2281.689 0.001193148

Relative Abundance

mrexp_spray <- plantObj[,which(pData(plantObj)$WW_TP %in% c("MA_TP1","MA_SI1") & 
                              pData(plantObj)$Stage %in% c("RawInfluent","PostScreeningInfluent","Effluent",
                                                            "BeforeUVTreatment", "AfterUVTreatment",
                                                            "HoldingPondInlet","PumpHouseInlet"))]

pData(mrexp_spray)$Stage_name <- pData(mrexp_spray)$Stage %>% 
      ifelse(. %in% c("RawInfluent","PostScreeningInfluent"),"Influent",.) %>% 
      factor(levels =c("Influent","Effluent", "BeforeUVTreatment", "AfterUVTreatment",
                       "HoldingPondInlet","PumpHouseInlet"))

mrexp_spray <- aggregateByTaxonomy(mrexp_spray, 
                                 lvl = "genus", 
                                 alternate = TRUE, 
                                 norm = TRUE, log = TRUE)

mrexp_spray <- cumNorm(mrexp_spray, p = 0.75)
mrexp_spray <- filterData(mrexp_spray, present = 7, depth = 1)
# mrexp_spray <- cumNorm(mrexp_spray, p = 0.75)
s <- normFactors(mrexp_spray)
pd <- pData(mrexp_spray)

settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~Stage_name, data = pd)
colnames(mod) = levels(pd$Stage_name)
res = fitZig(obj = mrexp_spray, mod = mod, control = settings)
zigFit = res$fit
fit2 <- eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075) 
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_spray)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>% 
    rownames_to_column(var = "genus") %>% 
    gather("Seq_ID", "Count",-genus) %>% left_join(pd) %>% 
    mutate(Stage = factor(Stage,levels = c("RawInfluent","Effluent","BeforeUVTreatment", 
                    "AfterUVTreatment","HoldingPondInlet", 
                    "PumpHouseInlet")))
kable(top_tbl)
Influent Effluent BeforeUVTreatment AfterUVTreatment HoldingPondInlet PumpHouseInlet scalingFactor AveExpr F P.Value adj.P.Val
Clostridium 3.5570660 -1.6141927 -1.9595022 -1.0891890 -1.7437475 -1.3724789 4.3545657 4.223864 156.58071 0 0
Mycobacterium 2.0651934 0.1811994 0.0985015 0.8622324 0.5073317 -0.4457880 4.5526913 4.427186 127.04375 0 0
Shewanella 1.7063515 1.3552535 2.4574504 1.8138119 2.3949668 1.0959432 -0.0750964 3.244486 78.74273 0 0
Halomonas 3.1254354 2.1345714 4.1157692 3.0225705 4.1584830 1.7392195 -2.2136631 4.687832 76.80866 0 0
Streptococcus 5.2933433 -2.7897601 -3.5008617 -3.3645001 -3.1790217 -5.3471901 1.7538123 2.698989 85.32113 0 0
Legionella -0.5004655 2.6120910 2.0660638 2.8075892 1.6454463 0.2716772 5.6389230 3.580591 77.12593 0 0
Gemmobacter 0.4679140 0.6253217 -0.3641536 0.3102889 -0.5985290 1.6409083 2.8580978 2.124359 72.43150 0 0
Ralstonia 0.4161509 1.9270323 3.1197301 2.6767776 3.0156011 1.3647255 -0.1557410 2.400292 65.62172 0 0
Pseudomonas 10.0054000 -4.4181942 -6.5006839 -6.3951256 -5.7633871 -2.8711371 -1.4186639 4.800067 61.90357 0 0
Rhodobacter 1.1178660 -0.9660405 -1.1823087 -0.4506051 -0.8821008 1.1792098 4.8011202 3.099284 65.67000 0 0
ggplot(count_df) + 
    geom_line(aes(x = Stage, y = Count, group = Date), color = "grey60") + 
    geom_point(aes(x = Stage, y = Count)) + 
    facet_wrap(~genus, nrow = 2) +
    theme_bw() + scale_y_log10() + labs(y = "Relative Abundance") +
    theme(axis.text.x = element_text(angle = 90))

Session information

import_biom2 function form joe_diversity_function.R script.

import_biom2
## function (x, treefilename = NULL, refseqfilename = NULL, refseqFunction = readDNAStringSet, 
##     refseqArgs = NULL, parseFunction = parse_taxonomy_default, 
##     parallel = FALSE, version = 1, ...) 
## {
##     argumentlist <- list()
##     x = biom(x)
##     b_data = biom_data(x)
##     b_data_mat = as(b_data, "matrix")
##     otutab = otu_table(b_data_mat, taxa_are_rows = TRUE)
##     argumentlist <- c(argumentlist, list(otutab))
##     if (all(sapply(sapply(x$rows, function(i) {
##         i$metadata
##     }), is.null))) {
##         taxtab <- NULL
##     }
##     else {
##         taxlist = lapply(x$rows, function(i) {
##             parseFunction(i$metadata$taxonomy)
##         })
##         names(taxlist) = sapply(x$rows, function(i) {
##             i$id
##         })
##         taxtab = build_tax_table(taxlist)
##     }
##     argumentlist <- c(argumentlist, list(taxtab))
##     if (is.null(sample_metadata(x))) {
##         samdata <- NULL
##     }
##     else {
##         samdata = sample_data(sample_metadata(x))
##     }
##     argumentlist <- c(argumentlist, list(samdata))
##     if (!is.null(treefilename)) {
##         if (inherits(treefilename, "phylo")) {
##             tree = treefilename
##         }
##         else {
##             tree <- read_tree(treefilename, ...)
##         }
##         if (is.null(tree)) {
##             warning("treefilename failed import. It not included.")
##         }
##         else {
##             argumentlist <- c(argumentlist, list(tree))
##         }
##     }
##     if (!is.null(refseqfilename)) {
##         if (inherits(refseqfilename, "XStringSet")) {
##             refseq = refseqfilename
##         }
##         else {
##             if (!is.null(refseqArgs)) {
##                 refseq = do.call("refseqFunction", c(list(refseqfilename), 
##                   refseqArgs))
##             }
##             else {
##                 refseq = refseqFunction(refseqfilename)
##             }
##         }
##         argumentlist <- c(argumentlist, list(refseq))
##     }
##     return(do.call("phyloseq", argumentlist))
## }
s_info <- devtools::session_info()
print(s_info$platform)
##  setting  value                       
##  version  R version 3.4.4 (2018-03-15)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/New_York            
##  date     2018-05-05
kable(s_info$packages)
package * version date source
ade4 1.7-11 2018-04-05 CRAN (R 3.4.4)
ape 5.1 2018-04-04 CRAN (R 3.4.4)
assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0)
backports 1.1.2 2017-12-13 CRAN (R 3.4.3)
base * 3.4.4 2018-03-15 local
bindr 0.1.1 2018-03-13 CRAN (R 3.4.4)
bindrcpp * 0.2.2 2018-03-29 CRAN (R 3.4.4)
Biobase * 2.38.0 2017-10-31 Bioconductor
BiocGenerics * 0.24.0 2017-10-31 Bioconductor
biomformat * 1.6.0 2017-10-31 Bioconductor
Biostrings 2.46.0 2017-10-31 Bioconductor
bitops 1.0-6 2013-08-17 CRAN (R 3.4.0)
broom * 0.4.4 2018-03-29 CRAN (R 3.4.4)
caTools 1.17.1 2014-09-10 CRAN (R 3.4.0)
cluster 2.0.7 2018-04-01 CRAN (R 3.4.4)
codetools 0.2-15 2016-10-05 CRAN (R 3.4.4)
colorspace 1.3-2 2016-12-14 CRAN (R 3.4.0)
compiler 3.4.4 2018-03-15 local
cowplot 0.9.2 2017-12-17 cran (@0.9.2)
data.table 1.10.4-3 2017-10-27 cran (@1.10.4-)
datasets * 3.4.4 2018-03-15 local
devtools * 1.13.5 2018-02-18 CRAN (R 3.4.3)
digest 0.6.15 2018-01-28 cran (@0.6.15)
dplyr * 0.7.4 2017-09-28 cran (@0.7.4)
DT * 0.4 2018-01-30 cran (@0.4)
evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
foreach * 1.4.4 2017-12-12 CRAN (R 3.4.3)
foreign 0.8-69 2017-06-22 CRAN (R 3.4.4)
gdata 2.18.0 2017-06-06 CRAN (R 3.4.0)
ggExtra * 0.8 2018-04-04 CRAN (R 3.4.4)
ggfortify * 0.4.4 2018-04-18 CRAN (R 3.4.4)
ggplot2 * 2.2.1.9000 2018-04-05 Github (hadley/ggplot2@3c9c504)
ggpubr * 0.1.6 2017-11-14 cran (@0.1.6)
glmnet * 2.0-16 2018-04-02 CRAN (R 3.4.4)
glue 1.2.0 2017-10-29 cran (@1.2.0)
gplots 3.0.1 2016-03-30 CRAN (R 3.4.0)
graphics * 3.4.4 2018-03-15 local
grDevices * 3.4.4 2018-03-15 local
grid 3.4.4 2018-03-15 local
gridExtra 2.3 2017-09-09 cran (@2.3)
gtable 0.2.0 2016-02-26 CRAN (R 3.4.0)
gtools 3.5.0 2015-05-29 CRAN (R 3.4.0)
highr 0.6 2016-05-09 CRAN (R 3.4.0)
hms 0.4.2 2018-03-10 CRAN (R 3.4.4)
htmltools 0.3.6 2017-04-28 CRAN (R 3.4.0)
htmlwidgets 1.0 2018-01-20 cran (@1.0)
httpuv 1.3.6.2 2018-03-02 CRAN (R 3.4.3)
httr 1.3.1 2017-08-20 CRAN (R 3.4.1)
igraph 1.2.1 2018-03-10 CRAN (R 3.4.4)
iNEXT * 2.0.12 2016-11-12 CRAN (R 3.4.0)
IRanges 2.12.0 2017-10-31 Bioconductor
iterators 1.0.9 2017-12-12 CRAN (R 3.4.3)
jsonlite 1.5 2017-06-01 CRAN (R 3.4.0)
KernSmooth 2.23-15 2015-06-29 CRAN (R 3.4.4)
knitr * 1.20 2018-02-20 CRAN (R 3.4.3)
labeling 0.3 2014-08-23 CRAN (R 3.4.0)
lattice * 0.20-35 2017-03-25 CRAN (R 3.4.4)
lazyeval 0.2.1 2017-10-29 cran (@0.2.1)
limma * 3.34.9 2018-02-22 Bioconductor
magrittr * 1.5 2014-11-22 CRAN (R 3.4.0)
MASS 7.3-49 2018-02-23 CRAN (R 3.4.4)
Matrix * 1.2-13 2018-04-02 CRAN (R 3.4.4)
matrixStats 0.53.1 2018-02-11 cran (@0.53.1)
memoise 1.1.0 2017-04-21 CRAN (R 3.4.0)
metagenomeSeq * 1.20.1 2017-12-12 Bioconductor
methods * 3.4.4 2018-03-15 local
mgcv 1.8-23 2018-01-21 CRAN (R 3.4.4)
mime 0.5 2016-07-07 CRAN (R 3.4.0)
miniUI 0.1.1 2016-01-15 cran (@0.1.1)
mnormt 1.5-5 2016-10-15 CRAN (R 3.4.0)
multtest 2.34.0 2017-10-31 Bioconductor
munsell 0.4.3 2016-02-13 CRAN (R 3.4.0)
nlme 3.1-131.1 2018-02-16 CRAN (R 3.4.4)
parallel * 3.4.4 2018-03-15 local
permute * 0.9-4 2016-09-09 CRAN (R 3.4.0)
phyloseq * 1.22.3 2017-11-07 Bioconductor
pillar 1.2.1 2018-02-27 CRAN (R 3.4.3)
pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.0)
plotly * 4.7.1 2017-07-29 CRAN (R 3.4.1)
plyr 1.8.4 2016-06-08 CRAN (R 3.4.0)
psych 1.8.3.3 2018-03-30 CRAN (R 3.4.4)
purrr * 0.2.4 2017-10-18 CRAN (R 3.4.2)
R6 2.2.2 2017-06-17 CRAN (R 3.4.0)
RColorBrewer * 1.1-2 2014-12-07 CRAN (R 3.4.0)
Rcpp 0.12.16 2018-03-13 cran (@0.12.16)
readr * 1.1.1 2017-05-16 CRAN (R 3.4.0)
reshape2 1.4.3 2017-12-11 cran (@1.4.3)
rhdf5 2.22.0 2017-10-31 Bioconductor
rlang 0.2.0.9001 2018-04-05 Github (r-lib/rlang@9c0637a)
rmarkdown 1.9 2018-03-01 CRAN (R 3.4.3)
rprojroot 1.3-2 2018-01-03 CRAN (R 3.4.3)
S4Vectors 0.16.0 2017-10-31 Bioconductor
scales 0.5.0.9000 2018-04-05 Github (hadley/scales@d767915)
shiny 1.0.5 2017-08-23 CRAN (R 3.4.1)
splines 3.4.4 2018-03-15 local
stats * 3.4.4 2018-03-15 local
stats4 3.4.4 2018-03-15 local
stringi 1.1.7 2018-03-12 CRAN (R 3.4.4)
stringr * 1.3.0 2018-02-19 cran (@1.3.0)
survival 2.41-3 2017-04-04 CRAN (R 3.4.4)
tibble * 1.4.2 2018-01-22 cran (@1.4.2)
tidyr * 0.8.0 2018-01-29 cran (@0.8.0)
tidyselect 0.2.4 2018-02-26 cran (@0.2.4)
tools 3.4.4 2018-03-15 local
utils * 3.4.4 2018-03-15 local
vegan * 2.4-6 2018-01-24 CRAN (R 3.4.3)
viridisLite 0.3.0 2018-02-01 cran (@0.3.0)
withr 2.1.2 2018-04-05 Github (jimhester/withr@79d7b0d)
xtable 1.8-2 2016-02-05 CRAN (R 3.4.0)
XVector 0.18.0 2017-10-31 Bioconductor
yaml 2.1.18 2018-03-08 cran (@2.1.18)
zlibbioc 1.24.0 2017-10-31 Bioconductor